Time Series

Time Series

Chapter 11

Lines for time series, w/ time on the x-axis

Multiple time series

# data from: https://www.freddiemac.com/pmms/docs/historicalweeklydata.xls
library(tidyverse)
df <- read_csv("historicalmortgage.csv")
df <- df |> 
  pivot_longer(cols = -DATE, names_to = "TYPE", values_to = "RATE") |> 
  mutate(TYPE = forcats::fct_reorder2(TYPE, DATE, RATE)) # puts legend in correct order

ggplot(df, aes(DATE, RATE, color = TYPE)) +
  geom_line() +
  ggtitle("U.S. Mortgage Rates") +
  labs (x = "", y = "percent") +
  theme_grey(16) +
  theme(legend.title = element_blank())

ggplot(df, aes(DATE, RATE, fill = TYPE)) +
  geom_col() +
  ggtitle("U.S. Mortgage Rates", subtitle = "Not recommended!")

Filter

library(lubridate)
df2020 <- df |>
  filter(year(DATE) == 2020)
ggplot(df2020, aes(DATE, RATE, color = TYPE)) +
  geom_line() +
  ggtitle("U.S. Mortgage Rates")

Overall long-term trend (secular)

dfman <- read_csv("ManchesterByTheSea.csv")
g <- ggplot(dfman, aes(Date, Gross)) +
  geom_line() +
  ggtitle("Manchester by the Sea", "Daily Gross (US$), United States") +
  xlab("2016-2017")
g

Loess smoother (non-parametric)

dfman$Group <- round(1:nrow(dfman)/(nrow(dfman)/5))
g <- ggplot(dfman, aes(Date, Gross)) +
  geom_point()
g +
  geom_line(color = "grey50") +
  geom_smooth(method = "loess", se = FALSE, lwd =1.5) +
  ggtitle("Loess smoother")

Loess smoother (non-parametric)

g +
  geom_line(color = "grey50") +
  geom_smooth(aes(group = Group), method = 'lm', color = "red", se = FALSE) + 
  geom_smooth(color = "blue", se = FALSE)

Under / overfitting

Default smoothing parameter .75

g + geom_smooth(method = "loess", span = .75, se = FALSE)

Change smoothing parameter .1

g + geom_smooth(method = "loess", span = .1, se = FALSE)

Change smoothing parameter .5

g + geom_smooth(method = "loess", span = .5, se = FALSE)

Change smoothing parameter .9

g + geom_smooth(method = "loess", span = .9, se = FALSE)

Are there cyclical patterns?

ggplot(dfman, aes(Date, Gross)) + geom_line() + geom_point()

library(plotly)
p <- plot_ly(
    dfman, x = ~Date, y = ~Gross,
    type = 'scatter',
    mode = 'lines+markers',
    # Hover text:
    hoverinfo = 'text',
    text = ~paste(Day)
)
p

Mark the pattern

g <- ggplot(dfman, aes(Date, Gross)) + geom_line() +
    ggtitle("Manchester by the Sea",
            "Daily Gross, United States")
saturday <- dfman |> filter(wday(Date) == 7)
g + geom_point(data = saturday, aes(Date, Gross),
               color = "deeppink")

Facet to test (remove) cyclical pattern

# facets
ggplot(dfman, aes(Date, Gross)) +
    geom_line() +
    facet_grid(wday(Date, label = TRUE)~.)

Another view

ggplot(dfman, aes(Date, Gross)) +
    geom_line(color = "grey30") + geom_point(size = 1) +
    facet_grid(.~wday(Date, label = TRUE))

With smoother

ggplot(dfman, aes(Date, Gross)) +
    geom_line(color = "grey30") + geom_point(size = 1) +
    facet_grid(.~wday(Date, label = TRUE)) +
    geom_smooth(se = FALSE)

Air Passengers

plot(AirPassengers)

monthplot()

monthplot(AirPassengers)

What about the abnormalities?

Label by day of week

christmas <- dfman |>
    filter(Date >= as.Date("2016-12-20") &
               Date <= ("2017-01-03"))

ggplot(christmas, aes(Date, Gross)) +
    geom_label(aes(label = wday(Date, label = TRUE))) +
    geom_line(color = "cornflowerblue") + 
    scale_x_date(date_labels = "%b\n%d",
                 date_breaks = "1 day")

Different pattern for Christmas Week

Label by day of month

ggplot(christmas, aes(Date, Gross/1000000)) +
    geom_line(color = "cornflowerblue", lwd = 1.1) + 
    geom_point(color = "cornflowerblue", size = 2) +
    geom_label(data = christmas, 
               aes(x = Date, y = Gross/1000000 + .06, 
                   label = day(Date))) +
    scale_x_date(date_labels = "%a",
                 date_breaks = "1 day") +
    ggtitle("Manchester by the Sea",
            "Chistmas Week Box Office Gross") +
    labs(x = "Dec 2016 - Jan 2017", 
         y = "Daily Gross (in millions $US)") +
    theme_grey(14)

Highlight the abnormality

# annotate Christmas Week
start <- as.Date("2016-12-24")
end <- as.Date("2017-01-02")
g + annotate("rect", xmin = start, xmax = end,
             ymin = -Inf, ymax = Inf, fill = "green",
             alpha = .2) +
    annotate("text", x = end + 2,
             y = 1500000, label = "Dec 24 - Jan 2",
             color = "green", hjust = 0) +
    theme_classic()

Comparisons (multi-line plots)

set.seed(5702)
tidydf <- data.frame(time = 1:6, y1 = round(rnorm(6, 100, 50), 2),
           y2 = round(rnorm(6, 10, 5), 2)) |> 
  pivot_longer(cols = -time)
ggplot(tidydf, aes(time, value, col = name)) +
  geom_line()

Scale the data (create an index)

Each value is divided by the first value for that group and multiplied by 100:

tidydf
time name value
1 y1 90.26
1 y2 8.12
2 y1 107.48
2 y2 8.55
3 y1 110.28
3 y2 12.83
4 y1 81.77
4 y2 2.01
5 y1 130.62
5 y2 11.68
6 y1 41.43
6 y2 16.49
tidydf <- tidydf |> group_by(name) |>
  mutate(index = round(100*value/value[1], 2)) |> 
  ungroup()
tidydf
time name value index
1 y1 90.26 100.00
1 y2 8.12 100.00
2 y1 107.48 119.08
2 y2 8.55 105.30
3 y1 110.28 122.18
3 y2 12.83 158.00
4 y1 81.77 90.59
4 y2 2.01 24.75
5 y1 130.62 144.72
5 y2 11.68 143.84
6 y1 41.43 45.90
6 y2 16.49 203.08
ggplot(tidydf, aes(time, index, col = name)) +
  geom_line()

Discrete data

set.seed(5702)
day <- 1:31
number <- 10 * (day - 14)^2 + 2000 + rnorm(1:31, 0, 400)
df <- data.frame(day, number)
ggplot(df, aes(day, number)) +
  geom_line(color = "deeppink") +
  geom_point(color = "deeppink") +
  scale_x_continuous(breaks = 1:31) +
  scale_y_continuous(limits = c(0, 5000)) +
  ggtitle("Average Motor Vehicle Inspections per Day",
          subtitle = "(fake data)") +
  labs(x = "day of month", y="number of inspections") +
  theme(plot.title = element_text(size = 16))

Better for individual values

ggplot(df, aes(day, number)) +
  geom_col() +
  scale_x_continuous(breaks = 1:31) +
  ggtitle("Average Motor Vehicle Inspections per Day") +
  labs(x = "day of month", y="number of inspections") +
  theme(plot.title = element_text(size = 16))

Gaps

# read file
mydat <- read_csv("WA_Sales_Products_2012-14.csv") |> 
  mutate(Revenue = Revenue/1000000)

# convert Quarter to a single numeric value Q
mydat$Q <- as.numeric(substr(mydat$Quarter, 2, 2))

# convert Q to end-of-quarter date
mydat$Date <- as.Date(paste0(mydat$Year, "-",
                             as.character(mydat$Q*3),
                             "-30"))

# Check that dates look ok
# unique(mydat$Date)
Methoddata <- mydat |> group_by(Date, `Order method type`) |>
    summarize(Revenue = sum(Revenue))

g <- ggplot(Methoddata, aes(Date, Revenue,
                            color = `Order method type`)) +
    geom_line(aes(group = `Order method type`)) +
    scale_x_date(limits = c(as.Date("2012-02-01"), as.Date("2014-12-31")),
        date_breaks = "6 months", date_labels = "%b %Y") +
  ylab("Revenue in mil $")
g

Add points to show the frequency of the data

g + geom_point()

Another example

No2013 <- Methoddata |> filter(year(Date) != 2013)
g <- ggplot(No2013, aes(Date, Revenue,
                            color = `Order method type`)) +
    geom_line(aes(group = `Order method type`)) +
    scale_x_date(limits = c(as.Date("2012-02-01"), as.Date("2014-12-31")),
        date_breaks = "6 months", date_labels = "%b %Y") +
  ylab("Revenue in mil $")
g

Add points to show the frequency of the data

g + geom_point()

Another option: leave gaps

(set missing values to NA)

Methoddata$Date[year(Methoddata$Date)==2013] <- NA
g <- ggplot(Methoddata, aes(Date, Revenue,
                            color = `Order method type`)) + 
    geom_path(aes(group = `Order method type`)) +
    scale_x_date(limits = c(as.Date("2012-02-01"), as.Date("2014-12-31")),
        date_breaks = "6 months", date_labels = "%b %Y") +
  ylab("Revenue in mil $")
g